# @shuaige : 陈世玉
# @name :微博热点.py
# @time :2024/12/5 08:34
import requests
from bs4 import BeautifulSoup
from lxml import etree
import time

# from pyspark.sql.connect.functions import url_encode


def get_weibo(q,page):
    url='https://s.weibo.com/topic?q='+q+'&pagetype=topic&topic=1&Refer=weibo_topic&page='+str(page)
    return url
def get_html(url):
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
        'cookie':'SINAGLOBAL=8206231692013.157.1732752924277; SCF=AlL8NJ_NyeO6fZMVL_Ld35itqfbAxFNLc0mGHdrlbF7IVRCFyxlhey5dGRCgiEVx2oId0CQYRdhfc1u0DZ_YEts.; SUB=_2A25KVIssDeRhGeFG7FAQ9SfNyz2IHXVpK4LkrDV8PUNbmtAYLRnjkW9NeOeNu1jsCw2zKRyY32Su53SbzYh3sJjS; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhNi_8XjI10PQq5cwPOcNil5NHD95QN1hMEeK-4eK5pWs4DqcjQi--Ri-isi-i2i--ci-iWi-8si--NiKnRi-zpi--ciKn4iK.0-8HFx7tt; ALF=02_1735952508; XSRF-TOKEN=JPwQ9dfAXRRhL-CiqUafNl9T; WBPSESS=RE5DW7rtioecyjp4RhkOS_uMeTw8f26b27Nk76QuemmZI2NvzHj7HeGdENsP4WqIDd-E1yOOA1J-hXcPwVswKroOuB1O2p-Z8sLLh56G_ke0U0D2U3oys_GfPi-7XrKexHPqW7zCq2rcN0Yk4RmVIQ==; _s_tentry=weibo.com; Apache=4028671889877.109.1733360621611; ULV=1733360621645:2:1:1:4028671889877.109.1733360621611:1732752924282',
        'referer':'https://s.weibo.com'
    }
    res=requests.get(url,headers=headers)
    res.encoding='utf-8'
    return res.text
def get_data(html):
    # html=etree.HTML(html)
    # title_list=html.xpath('//div[@class="info"]')
    soup=BeautifulSoup(html,'lxml')
    title_list=soup.select('.info')
    # print(len(title_list))
    with open ('data.txt','a',encoding='utf-8') as f:
    #     for title in title_list:
    #         f.write(title+'\n')
        for i in range(len(title_list)):
            title=title_list[i].get_text()
        #去除空格
            title=title.strip()
        #去除内容之间空格
            title=title.replace('\n','')
            lastindex=title.rfind('#')
            jiaodian=title[1:lastindex]
            print(lastindex)
            print(title)
            #获取关注量
            redu=title[lastindex-len(title)+1:]
            f.write(jiaodian+" "+redu+'\n')




if __name__ == '__main__':
    q='大学生就业'
    page=int(input('请输入页码：'))
    for i in range(1,page+1):
        url=get_weibo(q,i)
        html=get_html(url)
        get_data(html)


# //div[@class="info"]